# Code to reproduce analysis presented in:
#     Back to School: The Effects of School Reopening on Parents and Children
# Jens Olav Dahlgaard and Zoltan Fazekas
# February 2021


# packages, functions and data --------------------------------------------
# some packages are not strictly necessary as table export are commented out

library("tidyverse")
library("scales")
library("AER")
library("clusterSEs")
library("texreg")
library("xtable")

# create function for standard error
std_error <- function(x){
  sqrt(var(x, na.rm = TRUE) / sum(!is.na(x)))
}
# get the mean, even if missing
mean_na <- function(x) {
  mean(x, na.rm = TRUE)
}

# get means and standard error of the mean for particular categorical groups
# important note: assumes grouping prior (grouped_df)
get_means_se <- function(data, catvars){
  means <- data %>% 
    summarize_at(all_of(catvars), mean_na) %>% 
    pivot_longer(all_of(catvars), names_to = "variable", values_to = "mean")
  ses <- data %>% 
    summarize_at(all_of(catvars), std_error) %>% 
    pivot_longer(all_of(catvars), names_to = "variable", values_to = "se") %>% 
    ungroup() %>% 
    dplyr::select(se)
  out_res <- bind_cols(means, ses) %>% 
    dplyr::select(group_vars(data), variable, mean, se)
  return(out_res)
}

# configure plots
theme_custom <- function (base_size = 10, base_family = "Times") {
    theme_minimal(base_size = base_size, base_family = base_family) %+replace% 
      theme(panel.border = element_blank(),
            legend.position = "bottom",
            panel.spacing = unit(1.5, "lines"),
            strip.background = element_rect(colour = NA, fill = NA))   
}
theme_set(theme_custom())

# load full data: multiple different datasets already shaped for different analyses
# all can be derived from dta file, if necessary
# inspect the data and dimensions for more information on its format
# all can be loaded as separate .csv files as well
load("data/covid-reopening.Rdata")

# select and label outcomes (for plots, models and summary)
# for full analysis covid related question (last 4) will not be used
# referenced in discussion
outcomes_var <- c("stress_1", "stress_2", "stress_3", "stress_all", 
                  "pm_performance", "health_performance",
                  "econ_imp_hh", "work_week_red", "empl_concern",
                  paste0("covid_", c(1:4, "alt")))
outcomes_all <- str_c(outcomes_var, rep(c("_w1", "_w2"), 
                                        each = length(outcomes_var)))
relab_out <- data.frame(outcome = outcomes_var,
                        out_lab = outcome_labs <- c("Hard to wind down", "Rather touchy", 
                                                    "Intolerant of anything\nthat kept me from...",
                                                    "Parent stress", 
                                                    "PM support", 
                                                    "Health Authority support",
                                                    "Economic well-being",
                                                    "Work capacity", "Job outlook",
                                                    "Tested positive",
                                                    "Tested negative",
                                                    "Showed symptoms", 
                                                    "Showed no symptoms",
                                                    "Tested positive or showed symptoms"),
                        out_cat = c(rep("stress", 4),
                                    rep("pol", 2), rep("econ", "3"),
                                    rep("health", 5)))

child_var <- c(paste0("well_", 1:3), "well_all")
relab_child <- data.frame(child_var = stringr::str_c(child_var, rep(c("_w1", "_w2"), each = 4)),
                          child_lab = c("Feeling relaxed", "Dealing well",
                                        "Make up mind", "Child well-being"))

## covariates (other): used in balance checks
prelim_var <- c("PB_Age", "female", "cph_region", "zealand_region", 
                "south_region", "mid_region", "north_region", "higher_edu", "hh_income")
prelim_all <- str_c(prelim_var, rep(c("_w1", "_w2"), 
                                        each = length(prelim_var)))
relab_prelim <- data.frame(prelim     = prelim_var,
                           prelim_lab = c("Age (years)", "Female", "Copenhagen Region",
                                          "Zealand Region", "South Denmark Region",
                                          "Mid Jutland Region", "North Jutland Region",
                                          "Higher Education", "Household Income (1000 DKK)"))


# mean comparisons through regression, Wave 1-Wave 2 --------------------------------------------
dta_reg <- dta %>% data.frame()
dta_reg[, outcomes_all] <- apply(dta_reg[, outcomes_all], 2, 
                                 function (x) rescale(x, to = c(0, 100)))

dta_reg <- mutate(dta_reg, 
                  inc_missing = is.na(hh_income),
                  hh_income   = ifelse(inc_missing == 1, mean(hh_income, na.rm = TRUE), hh_income))

coef_mat   <- matrix(NA, nrow = 10, ncol = 2)
outcomes   <- outcomes_var[1:9]
mean_table <- matrix(NA, ncol = 5, nrow = 10)

mean_table[, 1] <- round(unlist(get_means_se(dta_reg, 
                                             paste0(outcomes_var[c(14, 1:9)], "_w1"))[ ,2]), 1)
mean_table[, 2] <- round(unlist(get_means_se(dta_reg, 
                                             paste0(outcomes_var[c(14, 1:9)], "_w2"))[ ,2]), 1)

for(i in 1:9){ # intercept only models of W2 - W1
  mod1 <- lm(unlist(dta_reg[, paste0(outcomes[i], "_w2")] - dta_reg[, paste0(outcomes[i], "_w1")]) ~ 1, 
             data = dta_reg)
  mean_table[i + 1, 3:4] <-  round(summary(mod1)$coefficients[1, 1:2], 1)
  mean_table[i + 1, 5]   <- paste0("[", round(confint(mod1)[1], 1), "; ", 
                                        round(confint(mod1)[2], 1), "]")
}

mod1 <- lm(covid_alt_w2 - covid_alt_w1 ~ 1, data = dta_reg)

mean_table[1 , 3:4] <- round(summary(mod1)$coefficients[1, 1:2])
mean_table[1, 5]    <- paste0("[", round(confint(mod1)[1], 1), "; ", 
                                   round(confint(mod1)[2], 1), "]")

rownames(mean_table)    <- outcome_labs[c(14, 1:9)]
rownames(mean_table)[4] <- "Intolerant of anything that kept me from..."
colnames(mean_table)    <- c("Wave 1", "Wave 2", "Mean difference", "Std. error", "95% CI")

# same for children level outcome
ch <- dplyr::select(children_wide, starts_with("well")) %>% data.frame()

kid_table      <- matrix(NA, ncol = 5, nrow = 4)
kid_table[, 1] <- round(unlist(get_means_se(ch, names(ch)[str_detect(names(ch), "w1")])[ ,2]), 1)
kid_table[, 2] <- round(unlist(get_means_se(ch, names(ch)[str_detect(names(ch), "w2")])[ ,2]), 1)

kid_out <- paste0("well_", c(1:3, "all"))
for(i in 1:4){
  mod1 <- lm(unlist(ch[, paste0(kid_out[i], "_w2")] - ch[, paste0(kid_out[i], "_w1")]) ~ 1, 
             data = ch)
  kid_table[i, 3:4] <- round(summary(mod1)$coefficients[1, 1:2], 1)
  kid_table[i, 5]   <- paste0("[", round(confint(mod1)[1], 1), "; ", round(confint(mod1)[2], 1), "]")
}
rownames(kid_table) <- c("Feeling relaxed (child)", "Dealing well (child)", 
                         "Make up mind (child)", "Child well-being")

# xtable(rbind(mean_table[2:nrow(mean_table),
#                         c(1, 2, 3, 5)], kid_table[, c(1, 2, 4, 5)]))


# plotting mean comparisons --------------------------------------------
# Appendix A1 and Table 1 for balance
# covariate balance
prelims <- prelim %>% ungroup() %>% group_by(grade, prelim_lab)
prelims <- get_means_se(prelims, "values")
prelims <- prelims %>%
  mutate(prelim_lab = factor(prelim_lab,
                             levels = c("Age (years)", "Female", 
                                        "Higher Education", "Household Income (1000 DKK)",
                                        "Copenhagen Region", "Zealand Region", 
                                        "South Denmark Region", "Mid Jutland Region", 
                                        "North Jutland Region")))
plot <- 
  ggplot(data = filter(prelims, !is.na(grade)), 
         aes(y = mean, 
             ymin = mean + qnorm(0.025) * se,
             ymax = mean + qnorm(0.975) * se,
             x = grade)) +
  geom_point(size = 2) +
  geom_errorbar(width = 0) + 
  facet_wrap(. ~ prelim_lab, scales = "free_y", ncol = 3) +
  labs(y = "Mean", x = "Grade")
last_plot()

# uncomment to save
# ggsave(plot, filename = "covariates.pdf", width = 6.5, height = 4)

# parental outcomes
stress <- filter(parents, str_detect(out_cat, "stress")) %>% ungroup()
stress <- stress %>% group_by(grade, wave, out_lab)
stress <- get_means_se(stress, "values")
stress <- mutate(stress,
                 out_lab = factor(out_lab, 
                                  levels = c("Hard to wind down", "Rather touchy", 
                                                    "Intolerant of anything\nthat kept me from...",
                                                    "Parent stress")))


pol <- filter(parents, str_detect(out_cat, "pol")) %>% ungroup()
pol <- pol %>% group_by(grade, wave, out_lab)
pol <- get_means_se(pol, "values")
pol <- mutate(pol,
                 out_lab = factor(out_lab, 
                                  levels = c("PM support", "Health Authority support")))


econ <- filter(parents, str_detect(out_cat, "econ")) %>% ungroup()
econ <- econ %>% group_by(grade, wave, out_lab)
econ <- get_means_se(econ, "values")
econ <- mutate(econ,
                 out_lab = factor(out_lab, 
                                  levels = c("Economic well-being", 
                                             "Work capacity",
                                             "Job outlook")))

# combined plot
all_out <- bind_rows(stress, pol, econ)
all_out$out_lab <- factor(all_out$out_lab,
                          levels = c("Hard to wind down", "Rather touchy", 
                                                      "Intolerant of anything\nthat kept me from...",
                                                      "Parent stress", 
                                                      "PM support", 
                                                      "Health Authority support",
                                                      "Economic well-being",
                                                      "Work capacity", "Job outlook"))
all_out$wave <- factor(all_out$wave,
                       levels = c("April\n15-22",
                                  "April 28\nMay 6"))
plot <-
  ggplot(data = filter(all_out, !is.na(grade)),
         aes(y = mean,
             ymin = mean + qnorm(0.025) * se,
             ymax = mean + qnorm(0.975) * se,
             x = wave, group = grade,
             color = grade)) +
  geom_errorbar(width = 0, position = position_dodge(width = 0.25)) +
  geom_point(size = 2, position = position_dodge(width = 0.25)) +
  labs(y = "Mean", x = "") +
  facet_wrap(~out_lab, ncol = 3, scales = "free_y") +
  scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "color", "Children's grade")
last_plot()

# uncomment for saving
# ggsave(plot, filename = "desc_parent.pdf", width = 6.5, height = 6)

# same mean comparison for child level outcomes
children <- children %>% ungroup() %>% 
  group_by(grade, wave, child_lab)
cmeans <- get_means_se(children, "values")
cmeans <- mutate(cmeans,
                 child_lab = factor(child_lab, 
                                    levels = c("Feeling relaxed", "Dealing well",
                                               "Make up mind", "Child well-being")))
cmeans$wave <- factor(cmeans$wave,
                       levels = c("April\n15-22",
                                  "April 28\nMay 6"))
plot <-
  ggplot(data = cmeans,
         aes(y = mean,
             ymin = mean + qnorm(0.025) * se,
             ymax = mean + qnorm(0.975) * se,
             x = wave, group = grade,
             color = grade)) +
  geom_errorbar(width = 0, position = position_dodge(width = 0.25)) +
  geom_point(size = 2, position = position_dodge(width = 0.25)) +
  labs(y = "Mean", x = "") +
  facet_wrap(~child_lab, ncol = 4) +
  scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "color", "Children's grade")
last_plot()

# uncomment for saving
# ggsave(plot, filename = "desc_child.pdf", width = 6.5, height = 3)

# regressions and coefficient plots -------------------------------------------
# Main text, Figure 2 (parents), Figure 3 (children)
# Material reported in table format in Appendix A2

coef_mat <- matrix(NA, nrow = 18, ncol = 3)
outcomes <- outcomes_var[1:9] # not modeling positive tests self report

dta_reg <- dta %>% data.frame()
dta_reg[, outcomes_all] <- apply(dta_reg[, outcomes_all], 2, 
	function (x) {rescale(x, to = c(0, 100))})

outcomes_all <- outcomes_all[str_detect(outcomes_all, "covid", negate = TRUE)]

dta_reg <- mutate(dta_reg, 
              inc_missing = is.na(hh_income),
              hh_income   = ifelse(inc_missing == 1, mean(hh_income, na.rm = TRUE), hh_income))

# without and with controls
for(i in 1:9){
  mod1 <- lm(unlist(dta_reg[, paste0(outcomes[i], "_w2")] - dta_reg[, paste0(outcomes[i], "_w1")]) ~ grade, data = dta_reg)
  mod2 <- lm(unlist(dta_reg[, paste0(outcomes[i], "_w2")] - dta_reg[, paste0(outcomes[i], "_w1")]) ~ grade + PB_Gender + PB_Age + as.factor(PB_Region) + higher_edu + hh_income + inc_missing, data = dta_reg)
  coef_mat[i    , 1:2] <- summary(mod1)$coefficients[2, 1:2]
  coef_mat[i + 9, 1:2] <- summary(mod2)$coefficients[2, 1:2]
  coef_mat[i    , 3] <- outcomes[i]
  coef_mat[i + 9, 3] <- outcomes[i]
  # uncomment to save in an output folder the regression tables
  # texreg(
  #        file = paste0("output/", outcomes[i], "-mod.tex"),
  #        list(mod1, mod2),
  #        ci.force = TRUE, digits = 2, custom.note = "",
  #        custom.coef.names = c("Intercept", "Stayed home (6-7th grade)",
  #                              "Gender", "Age", "Region2", "Region3", "Region4",
  #                              "Region5", "Higher education", "Income", "Incl. missing"),
  #        label = paste0("tab:",outcomes[i]), caption.above = TRUE, single.row = TRUE,
  #        caption = paste0("Parental model results: ", relab_out$out_lab[i]),
  #        use.packages = FALSE,
  #        float.pos = "!h")
}

coef_plot <- 
  tibble(est    = as.numeric(coef_mat[, 1]), se = as.numeric(coef_mat[, 2]), 
         outcome = coef_mat[, 3],
         controls = rep(c("No controls", "With controls"), each = 9)) 
coef_plot <- left_join(coef_plot, relab_out, by = "outcome")

coef_plot$out_lab <- factor(coef_plot$out_lab, 
                           levels = c("Hard to wind down", "Rather touchy", 
                                      "Intolerant of anything\nthat kept me from...",
                                      "Parent stress", 
                                      "PM support", 
                                      "Health Authority support",
                                      "Economic well-being",
                                      "Work capacity", "Job outlook"))

plot <-
  ggplot(data = coef_plot,
         aes(y = forcats::fct_rev(out_lab),
             x = est,
             xmin = est + se * qnorm(0.025),
             xmax = est + se * qnorm(0.975))) +
  geom_errorbarh(height = 0) +
  geom_point(size = 2) +
  facet_wrap(. ~ controls, ncol = 2) +
  labs(x = "Difference in change for parents of 6th/7th graders compared to parents of 4th/5th graders", 
       y = "") +
  geom_vline(xintercept = 0, alpha = 0.5, linetype = "dashed")
last_plot()

# uncomment for saving
# ggsave(plot, filename = "didplot.pdf", width = 6.5, height = 3)


# models for children
parent_cov <- dta_reg %>% 
  dplyr::select(c("id", "PB_Gender", "PB_Age", "PB_Region", 
  			      "higher_edu", "hh_income", "inc_missing"))

children_wide <- data.frame(children_wide)

children_wide[, str_detect(names(children_wide), "well")] <- apply(children_wide[, str_detect(names(children_wide), "well")], 2, 
																   function (x) {rescale(x, to = c(0, 100))})

children_wide <- left_join(children_wide, parent_cov, by = "id")
coef_mat <- matrix(NA, nrow = 8, ncol = 3)

# note: extended runtime, see boot.reps

for(i in 1:4){
  mod1 <- glm(unlist(children_wide[, paste0(child_var[i], "_w2")] - children_wide[, paste0(child_var[i], "_w1")]) ~
                grade, data = children_wide)
  mod1.cl <- cluster.wild.glm(mod1, dat = children_wide, cluster = ~id, boot.reps = 1000,
                              seed = 202021)
  mod2 <- glm(unlist(children_wide[, paste0(child_var[i], "_w2")] - children_wide[, paste0(child_var[i], "_w1")]) ~
                grade + as.factor(child) + PB_Gender + PB_Age + as.factor(PB_Region) + higher_edu + hh_income  +
                inc_missing, data = children_wide)
  mod2.cl <- cluster.wild.glm(mod2, dat = children_wide, cluster = ~id, boot.reps = 1000,
                              seed = 202021)
  
  coef_mat[i    , 1:3] <- c(as.numeric(coef(mod1)[2]), as.numeric(mod1.cl$ci[2, ]))
  coef_mat[i + 4, 1:3] <- c(as.numeric(coef(mod2)[2]), as.numeric(mod2.cl$ci[2, ]))
  
  ci.low.1  <- mod1.cl$ci[, 1]
  ci.high.1 <- mod1.cl$ci[, 2]
  ci.low.2  <- mod2.cl$ci[, 1]
  ci.high.2 <- mod2.cl$ci[, 2]
  
  # uncomment to save in an output folder the regression tables
  # texreg(list(mod1, mod2),
  #        override.ci.low  = list(ci.low.1, ci.low.2),
  #        override.ci.up = list(ci.high.1, ci.high.2),
  #        file = paste0("output/", child_var[i], "-mod.tex"),
  #        digits = 2, custom.note = "",
  #        custom.coef.names = c("Intercept", "Stayed home (6-7th grade)", "Child 2", "Child 3",
  #                              "Gender", "Age", "Region2", "Region3", "Region4",
  #                              "Region5", "Higher education", "Income", "Incl. missing"),
  #        label = paste0("tab:",child_var[i]), caption.above = TRUE, single.row = TRUE,
  #        caption = paste0("Child model results: ", relab_child$child_lab[i]),
  #        use.packages = FALSE,
  #        float.pos = "!h")
}

coef_plot <-
  tibble(est = coef_mat[, 1], lower = coef_mat[, 2], upper = coef_mat[, 3],
         place = rep(1:length(child_var), 2),
         labels = relab_child$child_lab,
         controls = rep(c("No controls", "With controls"), each = 4))

plot <-
  ggplot(data = coef_plot,
         aes(y = place,
             x = est,
             xmin = lower,
             xmax = upper)) +
  geom_errorbarh(height = 0) +
  geom_point(size = 2) +
  facet_wrap(. ~ controls, ncol = 2) +
  labs(x = "Difference in change for 6th/7th graders compared to 4th/5th graders", y = "") +
  scale_y_continuous(breaks = 1:4, labels = coef_plot$labels[1:4]) +
  geom_vline(xintercept = 0, alpha = 0.5, linetype = "dashed")
last_plot()

# uncomment for saving
# ggsave(plot, filename = "didplot_child.pdf", width = 6.5, height = 2)

# within family analysis --------------------------------------------
# Appendix 3
within_family <- pivot_longer(within_family,
                              cols = starts_with("diff"),
                              names_to = "item", values_to = "diff_score")
within_family <- within_family %>% ungroup() %>% group_by(wave, item)

wm <- get_means_se(within_family, "diff_score")
wm <- mutate(wm, lab = case_when(
  item == "diff_well1" ~ "Feeling relaxed",
  item == "diff_well2" ~ "Dealing well",
  item == "diff_well3" ~ "Make up mind",
  item == "diff_wellall" ~ "Child well-being"))

wm <- mutate(wm,
                 lab = factor(lab, levels = c("Feeling relaxed", "Dealing well",
                                        "Make up mind", "Child well-being")))

wm$wave <- factor(wm$wave,
                       levels = c("April\n15-22",
                                  "April 28\nMay 6"))

plot <-
  ggplot(data = wm,
         aes(y = mean,
             ymin = mean + qnorm(0.025) * se,
             ymax = mean + qnorm(0.975) * se,
             x = wave)) +
  geom_hline(yintercept = 0, alpha = 0.25, linetype = 3) +
  geom_errorbar(width = 0) +
  geom_point(size = 2) +
  labs(y = "Mean", x = "") +
  facet_wrap(~lab, ncol = 4)
last_plot()

# uncomment to save
# ggsave(plot, filename = "app_within.pdf", width = 6.5, height = 2)

# Note: Appendix 4 contains a full re-run, but including only first school
#       years around the cutoff, 5 or 6
#       Grouping variables and labeling needs to be recoded based on data from here
#       and all models refitted.

sessionInfo()